Lets review MLB draft season.

Start by loading the packages

suppressMessages({
  library(tidyverse) #ggplot2 dplyr tibble tidyr purrr forecats 
  library(ggrepel) #automatically position non-overlapping text labels
  library(glue) #interpreted literal strings
  library(gt)
  library(gtExtras)
  library(paletteer)
  library(mlbplotR)
})

Next we load the team logos

teams_colors_logos <- mlbplotR::load_mlb_teams() %>% 
  filter(!team_abbr %in% c("AL", "NL", "MLB")) %>% 
  mutate(
    a = rep(1:6, 5), 
    b = sort(rep(1:5, 6), decreasing=T), 
    alpha = ifelse(grepl("A", team_abbr),1,0.75),
    color = ifelse(grepl("E", team_abbr), "b/w", NA)
  )

Now lets load our draft data and clean it up.

exposure <- read.csv("./data/exposure_mar20.csv")

exposure <- exposure %>% 
  mutate(Picked.At = as.Date(as.POSIXct(exposure$Picked.At, format="%Y-%m-%d %H:%M:%S", tz="UTC")), 
         name = paste(First.Name, Last.Name)) %>% 
  select(name, Team, Position, Picked.At, Pick.Number, Draft) %>% 
  left_join(teams_colors_logos %>% select(team_abbr, team_logo_espn), by=c('Team'='team_abbr')) %>% 
  left_join(read.csv("./data/playerids.csv"), by=c('name'='Name')) %>% 
  mutate(
    copy = paste0(name, Draft),
    playerid = as.double(playerid)) %>% 
  distinct(copy, .keep_all = T) %>% 
  left_join(mlbplotR::load_headshots() %>% select(fangraphs_id, espn_headshot) %>% drop_na(fangraphs_id), by=c("playerid"="fangraphs_id")) %>% 
  select(-copy) %>% 
  left_join(read.csv("./projections_season/rankings_mar20.csv") %>% 
              mutate(name = paste(firstName, lastName), 
                     adp = as.numeric(adp)) %>% 
              select(name, adp, projectedPoints, positionRank),
            by=c("name")) %>% 
  mutate(value = Pick.Number-adp, 
         rel_value = round(value/adp, digits = 2),
         positionGroup = gsub("[^A-Z]","",positionRank)) %>% 
  drop_na(adp) %>% 
  arrange(Pick.Number)
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

Highest owned players

#group by player
exposure %>% 
  group_by(name, espn_headshot) %>% 
  summarise(count=n()) %>% 
  arrange(-count) %>% 
  ungroup() %>% 
  mutate(own = round(count/length(unique(exposure$Draft)),digits = 2)) %>% 
  slice_head(n=10) %>% 
  gt() %>% 
  gt_img_rows(columns = espn_headshot, height = 50) %>% 
  gt_theme_dark()
## `summarise()` has grouped output by 'name'. You can override using the
## `.groups` argument.
name espn_headshot count own
Mark Canha 16 0.28
J.D. Martinez 15 0.26
Jake Cronenworth 13 0.23
Aaron Nola 12 0.21
Brandon Woodruff 12 0.21
Logan Webb 12 0.21
Lourdes Gurriel Jr. 12 0.21
Xander Bogaerts 12 0.21
Alek Manoah 11 0.19
Andrew McCutchen 11 0.19

Lets look at the drafts grouped by date

#drafts by date
drafts_by_date <- exposure %>% 
  group_by(Picked.At) %>% 
  summarize(total_picks = n(),
            total_value = sum(value, na.rm = T), 
            total_rel_value = sum(rel_value, na.rm = T)) %>% 
  mutate(value_per_pick = round(total_value/total_picks,digits = 2), 
         rel_value_per_pick = round(total_rel_value/total_picks,digits=2))

drafts_by_date %>% gt()
Picked.At total_picks total_value total_rel_value value_per_pick rel_value_per_pick
2023-02-19 20 -21.0 0.25 -1.05 0.01
2023-02-21 172 -399.5 -0.65 -2.32 0.00
2023-02-22 111 232.2 7.02 2.09 0.06
2023-02-23 177 -26.8 0.50 -0.15 0.00
2023-02-25 120 118.6 3.09 0.99 0.03
2023-02-26 20 -29.3 0.04 -1.46 0.00
2023-02-27 39 -99.8 0.69 -2.56 0.02
2023-02-28 60 -8.4 0.42 -0.14 0.01
2023-03-01 20 35.1 0.78 1.76 0.04
2023-03-02 20 -13.2 0.10 -0.66 0.00
2023-03-04 40 29.2 0.56 0.73 0.01
2023-03-05 20 107.0 1.36 5.35 0.07
2023-03-06 20 32.6 -0.33 1.63 -0.02
2023-03-08 80 -27.1 0.22 -0.34 0.00
2023-03-09 20 -42.3 0.92 -2.11 0.05
2023-03-11 7 21.0 0.50 3.00 0.07
2023-03-12 57 67.8 0.56 1.19 0.01
2023-03-13 16 -84.2 -0.50 -5.26 -0.03
2023-03-19 80 239.2 2.20 2.99 0.03
2023-03-20 40 -32.9 -0.07 -0.82 0.00

Top 10 picks from all drafts in terms of value

#top ten picks by value
exposure %>% 
  select(name, team_logo_espn, espn_headshot, Pick.Number, adp, value, rel_value, Picked.At) %>% 
  arrange(-rel_value) %>% 
  slice_head(n=10) %>% 
  
  gt() %>% 
  gt_img_rows(columns = "team_logo_espn", height = 50) %>% 
  gt_img_rows(columns = "espn_headshot", height = 50)
name team_logo_espn espn_headshot Pick.Number adp value rel_value Picked.At
Aaron Judge 4 1.2 2.8 2.33 2023-02-22
Ronald Acuña Jr. 6 2.9 3.1 1.07 2023-03-09
Aaron Judge 2 1.2 0.8 0.67 2023-02-21
Juan Soto 4 2.5 1.5 0.60 2023-02-27
Julio Rodríguez 8 5.4 2.6 0.48 2023-02-22
Jarred Kelenic 225 153.5 71.5 0.47 2023-02-23
Shohei Ohtani 7 4.8 2.2 0.46 2023-02-22
Jarred Kelenic 221 153.5 67.5 0.44 2023-03-04
Adolis García 58 40.6 17.4 0.43 2023-03-05
Fernando Tatis Jr. 28 19.9 8.1 0.41 2023-02-22

team drafted

#group by team drafted
exposure %>% 
  group_by(Team, team_logo_espn) %>% 
  summarise(count=n()) %>% 
  arrange(-count) %>% 
  ungroup() %>% 
  rename(team = team_logo_espn) %>% 
  slice_head(n=10) %>% 
  gt() %>% 
  gt_img_rows(columns = team) %>% 
  gt_theme_dark() 
## `summarise()` has grouped output by 'Team'. You can override using the
## `.groups` argument.
Team team count
NYM 80
SD 78
STL 76
ATL 66
LAD 66
LAA 56
NYY 49
TB 49
HOU 45
MIL 45

by position

#group by position
exposure %>% 
  group_by(Position) %>% 
  summarise(count=n()) %>% 
  arrange(-count) %>% 
  ungroup() %>% 
  mutate(own = round(count/sum(count),digits = 2)) %>% 
  gt() 
Position count own
SP 338 0.30
RF 131 0.12
LF 130 0.11
3B 129 0.11
1B 92 0.08
CF 86 0.08
SS 86 0.08
2B 69 0.06
C 45 0.04
DH 33 0.03

Number of stacked batters by team

#group by position
exposure %>% 
  group_by(Draft, Team) %>% 
  summarise(count=n()) %>% 
  arrange(-count) %>% 
  ungroup() %>% 
  group_by(Team) %>% 
  summarise(count=n()) %>% 
  arrange(-count) %>% 
  gt() %>% 
  gt_theme_dark()
## `summarise()` has grouped output by 'Draft'. You can override using the
## `.groups` argument.
Team count
NYM 36
SD 35
STL 34
ATL 33
LAD 32
LAA 29
SF 29
TB 29
MIL 28
TOR 27
HOU 26
NYY 26
CLE 25
MIN 25
PHI 25
BOS 23
SEA 23
BAL 21
KC 21
ARI 19
MIA 19
CWS 17
TEX 17
PIT 15
CHC 12
COL 11
DET 8
CIN 5
WSH 2
OAK 1

Creating objects to merge to the drafts dataframe

ord <- c("P","IF","OF")

#configurations of P, OF, IF
exposure_config <- exposure %>% 
  group_by(Draft, positionGroup) %>% 
  summarise(count=n()) %>% 
  arrange(Draft, factor(positionGroup, levels = ord)) %>% 
  ungroup() %>% 
  group_by(Draft) %>% 
  summarise(config = as.numeric(paste0(count, collapse = ""))) %>% 
  ungroup() 
## `summarise()` has grouped output by 'Draft'. You can override using the
## `.groups` argument.
exposure_config %>% 
  group_by(config) %>% 
  summarise(count = n()) %>% 
  arrange(-count)
## # A tibble: 8 × 2
##   config count
##    <dbl> <int>
## 1    677    20
## 2    686    13
## 3    587    11
## 4    776     6
## 5    578     3
## 6    767     2
## 7    586     1
## 8    965     1
#stacked batters by draft
exposure_batters <- exposure %>% 
  filter(positionGroup != "P") %>% 
  group_by(Draft, Team) %>% 
  summarise(batters = n()) %>% 
  ungroup() %>% 
  filter(batters > 1) %>% 
  group_by(Draft) %>% 
  summarise(batters = sum(batters)) %>% 
  arrange(-batters)
## `summarise()` has grouped output by 'Draft'. You can override using the
## `.groups` argument.
# biggest stack per draft
exposure_big_stack <- exposure %>% 
  filter(positionGroup != "P") %>% 
  group_by(Draft, Team) %>% 
  summarise(batters = n()) %>% 
  ungroup() %>% 
  group_by(Draft) %>% 
  summarise(big_stack = max(batters)) %>% 
  ungroup()
## `summarise()` has grouped output by 'Draft'. You can override using the
## `.groups` argument.
#number of teams with stacked batters per draft
exposure_num_teams <- exposure %>% 
  filter(positionGroup != "P") %>% 
  group_by(Draft, Team) %>% 
  summarise(batters = n()) %>% 
  ungroup() %>% 
  filter(batters > 1) %>% 
  group_by(Draft) %>% 
  summarise(teams_stacked = n())
## `summarise()` has grouped output by 'Draft'. You can override using the
## `.groups` argument.
#find first pick of each draft
first_pick <- exposure %>% 
  filter(Pick.Number < 13) %>% select(name, Draft) %>% 
  rename("first_pick" = "name")

#drafts
drafts <- exposure %>% 
  #drop_na() %>% 
  group_by(Draft) %>% 
  summarize(total_picks = n(),
            total_value = sum(value), 
            total_rel_value = sum(rel_value), 
            Picked.At = last(Picked.At)) %>% 
  mutate(value_per_pick = round(total_value/total_picks, digits = 2),
         rel_value_per_pick = round(total_rel_value/total_picks, digits = 2)) %>% 
  arrange(-rel_value_per_pick) %>% 
  left_join(exposure_config, by=c("Draft")) %>% 
  left_join(exposure_batters, by=c("Draft")) %>% 
  left_join(exposure_big_stack, by=c("Draft")) %>% 
  left_join(exposure_num_teams, by=c("Draft")) %>% 
  left_join(first_pick, by=c("Draft")) %>% 
  mutate(file = paste(config, teams_stacked, batters, big_stack, first_pick))
#create a list from the dataframe
exp_list <- split(exposure, exposure$Draft)

name_mapping <- data.frame(
  old_names = drafts$Draft, 
  new_names = drafts$file, 
  stringsAsFactors = F
)

# Find the indices of the old names in the dataframe
name_indices <- match(names(exp_list), name_mapping$old_names)

# Rename the list elements using the new names from the dataframe
names(exp_list)[!is.na(name_indices)] <- name_mapping$new_names[name_indices[!is.na(name_indices)]]